lib_list <- c("readxl", "dplyr", "ggplot2",
"car", "multcomp", "caret",
"vegan", "plotly", "DESeq2",
"EnhancedVolcano")
load_libs <- function(lib_list) {
installed_libs <- lib_list %in% rownames(installed.packages())
if (any(installed_libs == F)) {
install.packages(lib_list[!installed_libs])
}
invisible(lapply(lib_list, library, character.only = T))
}
load_libs(lib_list)
## Warning: пакет 'S4Vectors' был собран под R версии 4.1.3
“Expression levels of 77 proteins measured in the cerebral cortex of 8 classes of control and Down syndrome mice exposed to context fear conditioning, a task used to assess associative learning.”
data = read_xls("../data/raw_data.xls", na = 'NA')
str(data)
## tibble [1,080 × 82] (S3: tbl_df/tbl/data.frame)
## $ MouseID : chr [1:1080] "309_1" "309_2" "309_3" "309_4" ...
## $ DYRK1A_N : num [1:1080] 0.504 0.515 0.509 0.442 0.435 ...
## $ ITSN1_N : num [1:1080] 0.747 0.689 0.73 0.617 0.617 ...
## $ BDNF_N : num [1:1080] 0.43 0.412 0.418 0.359 0.359 ...
## $ NR1_N : num [1:1080] 2.82 2.79 2.69 2.47 2.37 ...
## $ NR2A_N : num [1:1080] 5.99 5.69 5.62 4.98 4.72 ...
## $ pAKT_N : num [1:1080] 0.219 0.212 0.209 0.223 0.213 ...
## $ pBRAF_N : num [1:1080] 0.178 0.173 0.176 0.176 0.174 ...
## $ pCAMKII_N : num [1:1080] 2.37 2.29 2.28 2.15 2.13 ...
## $ pCREB_N : num [1:1080] 0.232 0.227 0.23 0.207 0.192 ...
## $ pELK_N : num [1:1080] 1.75 1.6 1.56 1.6 1.5 ...
## $ pERK_N : num [1:1080] 0.688 0.695 0.677 0.583 0.551 ...
## $ pJNK_N : num [1:1080] 0.306 0.299 0.291 0.297 0.287 ...
## $ PKCA_N : num [1:1080] 0.403 0.386 0.381 0.377 0.364 ...
## $ pMEK_N : num [1:1080] 0.297 0.281 0.282 0.314 0.278 ...
## $ pNR1_N : num [1:1080] 1.022 0.957 1.004 0.875 0.865 ...
## $ pNR2A_N : num [1:1080] 0.606 0.588 0.602 0.52 0.508 ...
## $ pNR2B_N : num [1:1080] 1.88 1.73 1.73 1.57 1.48 ...
## $ pPKCAB_N : num [1:1080] 2.31 2.04 2.02 2.13 2.01 ...
## $ pRSK_N : num [1:1080] 0.442 0.445 0.468 0.478 0.483 ...
## $ AKT_N : num [1:1080] 0.859 0.835 0.814 0.728 0.688 ...
## $ BRAF_N : num [1:1080] 0.416 0.4 0.4 0.386 0.368 ...
## $ CAMKII_N : num [1:1080] 0.37 0.356 0.368 0.363 0.355 ...
## $ CREB_N : num [1:1080] 0.179 0.174 0.174 0.179 0.175 ...
## $ ELK_N : num [1:1080] 1.87 1.76 1.77 1.29 1.32 ...
## $ ERK_N : num [1:1080] 3.69 3.49 3.57 2.97 2.9 ...
## $ GSK3B_N : num [1:1080] 1.54 1.51 1.5 1.42 1.36 ...
## $ JNK_N : num [1:1080] 0.265 0.256 0.26 0.26 0.251 ...
## $ MEK_N : num [1:1080] 0.32 0.304 0.312 0.279 0.274 ...
## $ TRKA_N : num [1:1080] 0.814 0.781 0.785 0.734 0.703 ...
## $ RSK_N : num [1:1080] 0.166 0.157 0.161 0.162 0.155 ...
## $ APP_N : num [1:1080] 0.454 0.431 0.423 0.411 0.399 ...
## $ Bcatenin_N : num [1:1080] 3.04 2.92 2.94 2.5 2.46 ...
## $ SOD1_N : num [1:1080] 0.37 0.342 0.344 0.345 0.329 ...
## $ MTOR_N : num [1:1080] 0.459 0.424 0.425 0.429 0.409 ...
## $ P38_N : num [1:1080] 0.335 0.325 0.325 0.33 0.313 ...
## $ pMTOR_N : num [1:1080] 0.825 0.762 0.757 0.747 0.692 ...
## $ DSCR1_N : num [1:1080] 0.577 0.545 0.544 0.547 0.537 ...
## $ AMPKA_N : num [1:1080] 0.448 0.421 0.405 0.387 0.361 ...
## $ NR2B_N : num [1:1080] 0.586 0.545 0.553 0.548 0.513 ...
## $ pNUMB_N : num [1:1080] 0.395 0.368 0.364 0.367 0.352 ...
## $ RAPTOR_N : num [1:1080] 0.34 0.322 0.313 0.328 0.312 ...
## $ TIAM1_N : num [1:1080] 0.483 0.455 0.447 0.443 0.419 ...
## $ pP70S6_N : num [1:1080] 0.294 0.276 0.257 0.399 0.393 ...
## $ NUMB_N : num [1:1080] 0.182 0.182 0.184 0.162 0.16 ...
## $ P70S6_N : num [1:1080] 0.843 0.848 0.856 0.76 0.768 ...
## $ pGSK3B_N : num [1:1080] 0.193 0.195 0.201 0.184 0.186 ...
## $ pPKCG_N : num [1:1080] 1.44 1.44 1.52 1.61 1.65 ...
## $ CDK5_N : num [1:1080] 0.295 0.294 0.302 0.296 0.297 ...
## $ S6_N : num [1:1080] 0.355 0.355 0.386 0.291 0.309 ...
## $ ADARB1_N : num [1:1080] 1.34 1.31 1.28 1.2 1.21 ...
## $ AcetylH3K9_N : num [1:1080] 0.17 0.171 0.185 0.16 0.165 ...
## $ RRP1_N : num [1:1080] 0.159 0.158 0.149 0.166 0.161 ...
## $ BAX_N : num [1:1080] 0.189 0.185 0.191 0.185 0.188 ...
## $ ARC_N : num [1:1080] 0.106 0.107 0.108 0.103 0.105 ...
## $ ERBB4_N : num [1:1080] 0.145 0.15 0.145 0.141 0.142 ...
## $ nNOS_N : num [1:1080] 0.177 0.178 0.176 0.164 0.168 ...
## $ Tau_N : num [1:1080] 0.125 0.134 0.133 0.123 0.137 ...
## $ GFAP_N : num [1:1080] 0.115 0.118 0.118 0.117 0.116 ...
## $ GluR3_N : num [1:1080] 0.228 0.238 0.245 0.235 0.256 ...
## $ GluR4_N : num [1:1080] 0.143 0.142 0.142 0.145 0.141 ...
## $ IL1B_N : num [1:1080] 0.431 0.457 0.51 0.431 0.481 ...
## $ P3525_N : num [1:1080] 0.248 0.258 0.255 0.251 0.252 ...
## $ pCASP9_N : num [1:1080] 1.6 1.67 1.66 1.48 1.53 ...
## $ PSD95_N : num [1:1080] 2.01 2 2.02 1.96 2.01 ...
## $ SNCA_N : num [1:1080] 0.108 0.11 0.108 0.12 0.12 ...
## $ Ubiquitin_N : num [1:1080] 1.045 1.01 0.997 0.99 0.998 ...
## $ pGSK3B_Tyr216_N: num [1:1080] 0.832 0.849 0.847 0.833 0.879 ...
## $ SHH_N : num [1:1080] 0.189 0.2 0.194 0.192 0.206 ...
## $ BAD_N : num [1:1080] 0.123 0.117 0.119 0.133 0.13 ...
## $ BCL2_N : num [1:1080] NA NA NA NA NA NA NA NA NA NA ...
## $ pS6_N : num [1:1080] 0.106 0.107 0.108 0.103 0.105 ...
## $ pCFOS_N : num [1:1080] 0.108 0.104 0.106 0.111 0.111 ...
## $ SYP_N : num [1:1080] 0.427 0.442 0.436 0.392 0.434 ...
## $ H3AcK18_N : num [1:1080] 0.115 0.112 0.112 0.13 0.118 ...
## $ EGR1_N : num [1:1080] 0.132 0.135 0.133 0.147 0.14 ...
## $ H3MeK4_N : num [1:1080] 0.128 0.131 0.127 0.147 0.148 ...
## $ CaNA_N : num [1:1080] 1.68 1.74 1.93 1.7 1.84 ...
## $ Genotype : chr [1:1080] "Control" "Control" "Control" "Control" ...
## $ Treatment : chr [1:1080] "Memantine" "Memantine" "Memantine" "Memantine" ...
## $ Behavior : chr [1:1080] "C/S" "C/S" "C/S" "C/S" ...
## $ class : chr [1:1080] "c-CS-m" "c-CS-m" "c-CS-m" "c-CS-m" ...
data$Genotype <- as.factor(data$Genotype)
data$Treatment <- as.factor(data$Treatment)
data$Behavior <- as.factor(data$Behavior)
data$class <- as.factor(data$class)
So, here are 72 mice (38 control and 34 trisomic (Down syndrome)). In the experiments, 15 measurements were registered of each protein per sample/mouse.The dataset contains a total of 1080 measurements per protein. Each measurement can be considered as an independent sample/mouse.
We can find the following groups:
##
## Control Ts65Dn
## 570 510
##
## Memantine Saline
## 570 510
##
## C/S S/C
## 525 555
Some mice have been stimulated to learn (context-shock) and others have not (shock-context).
##
## c-CS-m c-CS-s c-SC-m c-SC-s t-CS-m t-CS-s t-SC-m t-SC-s
## 150 135 150 135 135 105 135 135
They are quite balanced
colSums(is.na(data))
## MouseID DYRK1A_N ITSN1_N BDNF_N NR1_N
## 0 3 3 3 3
## NR2A_N pAKT_N pBRAF_N pCAMKII_N pCREB_N
## 3 3 3 3 3
## pELK_N pERK_N pJNK_N PKCA_N pMEK_N
## 3 3 3 3 3
## pNR1_N pNR2A_N pNR2B_N pPKCAB_N pRSK_N
## 3 3 3 3 3
## AKT_N BRAF_N CAMKII_N CREB_N ELK_N
## 3 3 3 3 18
## ERK_N GSK3B_N JNK_N MEK_N TRKA_N
## 3 3 3 7 3
## RSK_N APP_N Bcatenin_N SOD1_N MTOR_N
## 3 3 18 3 3
## P38_N pMTOR_N DSCR1_N AMPKA_N NR2B_N
## 3 3 3 3 3
## pNUMB_N RAPTOR_N TIAM1_N pP70S6_N NUMB_N
## 3 3 3 3 0
## P70S6_N pGSK3B_N pPKCG_N CDK5_N S6_N
## 0 0 0 0 0
## ADARB1_N AcetylH3K9_N RRP1_N BAX_N ARC_N
## 0 0 0 0 0
## ERBB4_N nNOS_N Tau_N GFAP_N GluR3_N
## 0 0 0 0 0
## GluR4_N IL1B_N P3525_N pCASP9_N PSD95_N
## 0 0 0 0 0
## SNCA_N Ubiquitin_N pGSK3B_Tyr216_N SHH_N BAD_N
## 0 0 0 0 213
## BCL2_N pS6_N pCFOS_N SYP_N H3AcK18_N
## 285 0 75 0 180
## EGR1_N H3MeK4_N CaNA_N Genotype Treatment
## 210 270 0 0 0
## Behavior class
## 0 0
Missing values are found only in numeric columns with protein levels.
data_BDNF_without_na <- subset(data, !is.na(data$BDNF_N))
data_BDNF_without_na %>%
group_by(class) %>%
summarise(mean_BDNF_N = mean(BDNF_N),
sd_BDNF_N = sd(BDNF_N))
## # A tibble: 8 × 3
## class mean_BDNF_N sd_BDNF_N
## <fct> <dbl> <dbl>
## 1 c-CS-m 0.339 0.0469
## 2 c-CS-s 0.342 0.0549
## 3 c-SC-m 0.291 0.0386
## 4 c-SC-s 0.313 0.0436
## 5 t-CS-m 0.313 0.0512
## 6 t-CS-s 0.305 0.0433
## 7 t-SC-m 0.321 0.0332
## 8 t-SC-s 0.326 0.0575
mod_data <- lm(BDNF_N ~ class,
data = data_BDNF_without_na)
summary(mod_data)
##
## Call:
## lm(formula = BDNF_N ~ class, data = data_BDNF_without_na)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.175764 -0.028777 -0.001609 0.028701 0.159388
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.339217 0.003817 88.871 < 2e-16 ***
## classc-CS-s 0.003098 0.005546 0.559 0.5766
## classc-SC-m -0.048272 0.005398 -8.942 < 2e-16 ***
## classc-SC-s -0.025825 0.005546 -4.657 3.62e-06 ***
## classt-CS-m -0.026485 0.005546 -4.776 2.04e-06 ***
## classt-CS-s -0.033757 0.005948 -5.675 1.78e-08 ***
## classt-SC-m -0.018154 0.005546 -3.273 0.0011 **
## classt-SC-s -0.013631 0.005579 -2.443 0.0147 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.04675 on 1069 degrees of freedom
## Multiple R-squared: 0.1097, Adjusted R-squared: 0.1039
## F-statistic: 18.82 on 7 and 1069 DF, p-value: < 2.2e-16
data_anova <- Anova(mod_data)
data_anova
## Anova Table (Type II tests)
##
## Response: BDNF_N
## Sum Sq Df F value Pr(>F)
## class 0.28784 7 18.816 < 2.2e-16 ***
## Residuals 2.33619 1069
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The level of BDNF protein significantly depends on the experimental class (F = 18.8160539, p_value = 8.8566341^{-24}, df_1 = 7, df_2 = 1069).
post_hoc <- glht(mod_data,
linfct = mcp(class = "Tukey"))
result_hoc <- summary(post_hoc)
## Warning in RET$pfunction("adjusted", ...): Completion with error > abseps
## Warning in RET$pfunction("adjusted", ...): Completion with error > abseps
result_hoc
##
## Simultaneous Tests for General Linear Hypotheses
##
## Multiple Comparisons of Means: Tukey Contrasts
##
##
## Fit: lm(formula = BDNF_N ~ class, data = data_BDNF_without_na)
##
## Linear Hypotheses:
## Estimate Std. Error t value Pr(>|t|)
## c-CS-s - c-CS-m == 0 0.0030979 0.0055459 0.559 0.99930
## c-SC-m - c-CS-m == 0 -0.0482717 0.0053980 -8.942 < 0.001 ***
## c-SC-s - c-CS-m == 0 -0.0258249 0.0055459 -4.657 < 0.001 ***
## t-CS-m - c-CS-m == 0 -0.0264852 0.0055459 -4.776 < 0.001 ***
## t-CS-s - c-CS-m == 0 -0.0337570 0.0059483 -5.675 < 0.001 ***
## t-SC-m - c-CS-m == 0 -0.0181541 0.0055459 -3.273 0.02416 *
## t-SC-s - c-CS-m == 0 -0.0136310 0.0055790 -2.443 0.22098
## c-SC-m - c-CS-s == 0 -0.0513696 0.0055459 -9.263 < 0.001 ***
## c-SC-s - c-CS-s == 0 -0.0289228 0.0056900 -5.083 < 0.001 ***
## t-CS-m - c-CS-s == 0 -0.0295831 0.0056900 -5.199 < 0.001 ***
## t-CS-s - c-CS-s == 0 -0.0368549 0.0060829 -6.059 < 0.001 ***
## t-SC-m - c-CS-s == 0 -0.0212520 0.0056900 -3.735 0.00498 **
## t-SC-s - c-CS-s == 0 -0.0167289 0.0057223 -2.923 0.06882 .
## c-SC-s - c-SC-m == 0 0.0224468 0.0055459 4.047 0.00136 **
## t-CS-m - c-SC-m == 0 0.0217865 0.0055459 3.928 0.00233 **
## t-CS-s - c-SC-m == 0 0.0145147 0.0059483 2.440 0.22245
## t-SC-m - c-SC-m == 0 0.0301176 0.0055459 5.431 < 0.001 ***
## t-SC-s - c-SC-m == 0 0.0346406 0.0055790 6.209 < 0.001 ***
## t-CS-m - c-SC-s == 0 -0.0006603 0.0056900 -0.116 1.00000
## t-CS-s - c-SC-s == 0 -0.0079321 0.0060829 -1.304 0.89711
## t-SC-m - c-SC-s == 0 0.0076708 0.0056900 1.348 0.87977
## t-SC-s - c-SC-s == 0 0.0121939 0.0057223 2.131 0.39514
## t-CS-s - t-CS-m == 0 -0.0072718 0.0060829 -1.195 0.93312
## t-SC-m - t-CS-m == 0 0.0083311 0.0056900 1.464 0.82594
## t-SC-s - t-CS-m == 0 0.0128542 0.0057223 2.246 0.32368
## t-SC-m - t-CS-s == 0 0.0156029 0.0060829 2.565 0.16934
## t-SC-s - t-CS-s == 0 0.0201260 0.0061130 3.292 0.02297 *
## t-SC-s - t-SC-m == 0 0.0045231 0.0057223 0.790 0.99360
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)
For pairwise multiple comparisons post-hoc paired Tukey test was used (df = 1069). We observed significant differences between several classes (here we listed groups with only one factor changing, where interpretation is more clear):
BDNF is a member of the neurotrophin family of growth factors, which are related to the canonical nerve growth factor. Control mice, stimulated to learn, demonstrate the highest level of BDNF protein (Brain-derived neurotrophic factor), in spite of treatment type.
Memantine is an NMDA receptor antagonist, that works by decreasing abnormal activity in the brain. According to the article, memantine was shown to rescue performance of the Ts65Dn in several learning and memory tasks.
Our results show that learning stimulation increases BDNF level in control mice (that is in agreement with previous data), but decreases it in trisomy mice (regardless of treatment type). Interestingly, memantine decreases BDNF protein level in control mice, not stimulated to learn. Furthermore, the analysis between control and trisomy groups, stimulated to learn, have revealed that the level of BDNF protein content is smaller in trisomy mice, regardless of treatment type. However, not stimulated trisomy mice exposed to memantine treatment show increased level of BDNF in comparison with contol group.
data_without_na <-na.omit(data)
# remove Mouse_ID column
data_without_na_for_ERBB4 <- data_without_na[, 2:82]
# split the data into training and test set
set.seed(123)
training_samples <-
data_without_na_for_ERBB4$ERBB4_N %>%
createDataPartition(p = 0.8, list = FALSE)
train_data <-
data_without_na_for_ERBB4[training_samples, ]
test_data <-
data_without_na_for_ERBB4[-training_samples, ]
ERBB4_pred_model <- lm(ERBB4_N ~ .,
data = train_data)
summary(ERBB4_pred_model)
##
## Call:
## lm(formula = ERBB4_N ~ ., data = train_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0214510 -0.0028381 -0.0002578 0.0030123 0.0272713
##
## Coefficients: (4 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0261944 0.0099148 2.642 0.008601 **
## DYRK1A_N -0.0111829 0.0080486 -1.389 0.165560
## ITSN1_N -0.0106220 0.0132604 -0.801 0.423639
## BDNF_N 0.0501510 0.0208316 2.407 0.016566 *
## NR1_N -0.0176050 0.0076925 -2.289 0.022680 *
## NR2A_N -0.0004137 0.0015023 -0.275 0.783201
## pAKT_N 0.0932389 0.0343903 2.711 0.007024 **
## pBRAF_N -0.0760286 0.0339889 -2.237 0.025905 *
## pCAMKII_N 0.0020648 0.0010152 2.034 0.042687 *
## pCREB_N -0.0442114 0.0336857 -1.312 0.190197
## pELK_N 0.0005901 0.0010747 0.549 0.583261
## pERK_N -0.0076356 0.0095159 -0.802 0.422845
## pJNK_N -0.0664517 0.0285649 -2.326 0.020554 *
## PKCA_N -0.0121265 0.0306691 -0.395 0.692782
## pMEK_N -0.0113037 0.0310430 -0.364 0.715973
## pNR1_N -0.0181065 0.0158934 -1.139 0.255355
## pNR2A_N -0.0001541 0.0086754 -0.018 0.985841
## pNR2B_N 0.0147342 0.0078469 1.878 0.061228 .
## pPKCAB_N 0.0032716 0.0032043 1.021 0.307925
## pRSK_N 0.0022576 0.0169152 0.133 0.893897
## AKT_N 0.0089051 0.0099235 0.897 0.370119
## BRAF_N 0.0171293 0.0145427 1.178 0.239627
## CAMKII_N -0.0175886 0.0234728 -0.749 0.454154
## CREB_N 0.0201630 0.0359409 0.561 0.575142
## ELK_N 0.0030979 0.0042569 0.728 0.467244
## ERK_N -0.0002002 0.0032365 -0.062 0.950703
## GSK3B_N -0.0172864 0.0092741 -1.864 0.063140 .
## JNK_N -0.0158671 0.0409648 -0.387 0.698736
## MEK_N 0.0303950 0.0262379 1.158 0.247450
## TRKA_N 0.0105654 0.0189476 0.558 0.577454
## RSK_N 0.0508162 0.0460985 1.102 0.271048
## APP_N -0.0272689 0.0228060 -1.196 0.232604
## Bcatenin_N 0.0137664 0.0061961 2.222 0.026918 *
## SOD1_N -0.0054085 0.0045275 -1.195 0.233034
## MTOR_N 0.0413274 0.0191100 2.163 0.031227 *
## P38_N -0.0032480 0.0152839 -0.213 0.831830
## pMTOR_N -0.0143343 0.0086706 -1.653 0.099156 .
## DSCR1_N 0.0151403 0.0123787 1.223 0.222091
## AMPKA_N 0.0513835 0.0267619 1.920 0.055642 .
## NR2B_N 0.0106815 0.0104132 1.026 0.305690
## pNUMB_N -0.0529454 0.0290815 -1.821 0.069497 .
## RAPTOR_N 0.0536374 0.0275528 1.947 0.052345 .
## TIAM1_N -0.0342437 0.0237459 -1.442 0.150144
## pP70S6_N 0.0053228 0.0063682 0.836 0.403796
## NUMB_N -0.0314378 0.0437424 -0.719 0.472789
## P70S6_N -0.0043131 0.0065700 -0.656 0.511932
## pGSK3B_N 0.1209513 0.0475316 2.545 0.011354 *
## pPKCG_N -0.0061634 0.0022786 -2.705 0.007155 **
## CDK5_N -0.0016036 0.0102196 -0.157 0.875400
## S6_N 0.0142276 0.0089512 1.589 0.112831
## ADARB1_N -0.0014272 0.0023056 -0.619 0.536279
## AcetylH3K9_N -0.0114964 0.0125481 -0.916 0.360178
## RRP1_N -0.0508979 0.0270387 -1.882 0.060585 .
## BAX_N -0.0458340 0.0444971 -1.030 0.303678
## ARC_N 0.1678261 0.0778117 2.157 0.031679 *
## nNOS_N 0.0200351 0.0345502 0.580 0.562355
## Tau_N 0.0798833 0.0230796 3.461 0.000602 ***
## GFAP_N -0.0422735 0.0637924 -0.663 0.507963
## GluR3_N 0.0108325 0.0294415 0.368 0.713138
## GluR4_N -0.0773386 0.0432524 -1.788 0.074603 .
## IL1B_N 0.0419986 0.0133267 3.151 0.001760 **
## P3525_N 0.0133468 0.0305957 0.436 0.662929
## pCASP9_N 0.0060419 0.0038587 1.566 0.118275
## PSD95_N 0.0190499 0.0042423 4.490 9.58e-06 ***
## SNCA_N -0.0090130 0.0419516 -0.215 0.830012
## Ubiquitin_N 0.0016481 0.0063138 0.261 0.794217
## pGSK3B_Tyr216_N 0.0425853 0.0130005 3.276 0.001156 **
## SHH_N 0.0254733 0.0236638 1.076 0.282439
## BAD_N -0.1047916 0.0412644 -2.540 0.011519 *
## BCL2_N 0.0291155 0.0335944 0.867 0.386694
## pS6_N NA NA NA NA
## pCFOS_N -0.0271498 0.0359647 -0.755 0.450800
## SYP_N 0.0224881 0.0122682 1.833 0.067620 .
## H3AcK18_N 0.0284484 0.0309120 0.920 0.358030
## EGR1_N 0.0200172 0.0316568 0.632 0.527580
## H3MeK4_N -0.0068933 0.0262898 -0.262 0.793313
## CaNA_N -0.0031680 0.0044082 -0.719 0.472812
## GenotypeTs65Dn 0.0061792 0.0055797 1.107 0.268844
## TreatmentSaline 0.0030326 0.0039685 0.764 0.445256
## BehaviorS/C -0.0173670 0.0044434 -3.908 0.000111 ***
## classc-CS-s -0.0005347 0.0049053 -0.109 0.913254
## classc-SC-m 0.0030384 0.0048681 0.624 0.532928
## classc-SC-s -0.0021493 0.0082647 -0.260 0.794969
## classt-CS-m -0.0067418 0.0050586 -1.333 0.183453
## classt-CS-s NA NA NA NA
## classt-SC-m NA NA NA NA
## classt-SC-s NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.005755 on 361 degrees of freedom
## Multiple R-squared: 0.8636, Adjusted R-squared: 0.8326
## F-statistic: 27.88 on 82 and 361 DF, p-value: < 2.2e-16
Metrics look good
predictions <- ERBB4_pred_model %>%
predict(test_data)
## Warning in predict.lm(., test_data): prediction from a rank-deficient fit may be
## misleading
# prediction error, RMSE
RMSE(predictions, test_data$ERBB4_N)
## [1] 0.00674468
# r-square
R2(predictions, test_data$ERBB4_N)
## [1] 0.8540674
Model shows rather goof performance
ERBB4 is a receptor tyrosine kinase, a member of the epidermal growth factor receptor family. It plays an essential role as cell surface receptor for neuregulins and EGF family members and regulates development of the central nervous system.
Let’s find out what are the best predictors for this target:
importance <- varImp(ERBB4_pred_model, scale=FALSE)
# summarize importance
importance %>% arrange(desc(Overall)) %>% top_n(10)
## Selecting by Overall
## Overall
## PSD95_N 4.490460
## BehaviorS/C 3.908485
## Tau_N 3.461210
## pGSK3B_Tyr216_N 3.275663
## IL1B_N 3.151463
## pAKT_N 2.711202
## pPKCG_N 2.704970
## pGSK3B_N 2.544651
## BAD_N 2.539517
## BDNF_N 2.407448
So, the most important interactors/effectors, that are connected with ERBB4 content are PSD95(postsynaptic density protein 95, membrane-associated guanylate kinase), behavior type (stimulated/notstimulated to learn), Tau protein (maintains the stability of microtubules in axons), pGSK3B_Tyr216 (glycogen synthase kinase-3 in phossphorylated form), IL1B (interleukin 1 beta), kinase AKT (there is a data, showing that ERBB4 activation promotes AKT signaling!) and other, including BDNF, described earlier.
data_num <-
data_without_na %>% select_if(is.numeric)
data_pca <- rda(data_num, scale = TRUE)
head(summary(data_pca))
##
## Call:
## rda(X = data_num, scale = TRUE)
##
## Partitioning of correlations:
## Inertia Proportion
## Total 77 1
## Unconstrained 77 1
##
## Eigenvalues, and their contribution to the correlations
##
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Eigenvalue 23.0488 13.3587 7.58646 7.16154 3.28242 3.04234 2.36733
## Proportion Explained 0.2993 0.1735 0.09853 0.09301 0.04263 0.03951 0.03074
## Cumulative Proportion 0.2993 0.4728 0.57135 0.66436 0.70699 0.74650 0.77724
## PC8 PC9 PC10 PC11 PC12 PC13 PC14
## Eigenvalue 2.25528 1.83075 1.27872 1.11166 0.98313 0.86294 0.700922
## Proportion Explained 0.02929 0.02378 0.01661 0.01444 0.01277 0.01121 0.009103
## Cumulative Proportion 0.80653 0.83031 0.84691 0.86135 0.87412 0.88533 0.894428
## PC15 PC16 PC17 PC18 PC19 PC20
## Eigenvalue 0.611169 0.569217 0.520420 0.484876 0.418534 0.399046
## Proportion Explained 0.007937 0.007392 0.006759 0.006297 0.005436 0.005182
## Cumulative Proportion 0.902366 0.909758 0.916517 0.922814 0.928249 0.933432
## PC21 PC22 PC23 PC24 PC25 PC26
## Eigenvalue 0.387367 0.339991 0.319123 0.292254 0.259013 0.241727
## Proportion Explained 0.005031 0.004415 0.004144 0.003796 0.003364 0.003139
## Cumulative Proportion 0.938462 0.942878 0.947022 0.950818 0.954182 0.957321
## PC27 PC28 PC29 PC30 PC31 PC32
## Eigenvalue 0.216222 0.20709 0.182731 0.166772 0.150604 0.136755
## Proportion Explained 0.002808 0.00269 0.002373 0.002166 0.001956 0.001776
## Cumulative Proportion 0.960129 0.96282 0.965192 0.967358 0.969314 0.971090
## PC33 PC34 PC35 PC36 PC37 PC38
## Eigenvalue 0.133922 0.124826 0.123532 0.112486 0.105178 0.09622
## Proportion Explained 0.001739 0.001621 0.001604 0.001461 0.001366 0.00125
## Cumulative Proportion 0.972829 0.974450 0.976054 0.977515 0.978881 0.98013
## PC39 PC40 PC41 PC42 PC43 PC44
## Eigenvalue 0.091371 0.091173 0.082556 0.079101 0.0734032 0.0725823
## Proportion Explained 0.001187 0.001184 0.001072 0.001027 0.0009533 0.0009426
## Cumulative Proportion 0.981317 0.982501 0.983574 0.984601 0.9855541 0.9864968
## PC45 PC46 PC47 PC48 PC49
## Eigenvalue 0.0680414 0.0645015 0.0631263 0.0577969 0.0547784
## Proportion Explained 0.0008837 0.0008377 0.0008198 0.0007506 0.0007114
## Cumulative Proportion 0.9873804 0.9882181 0.9890379 0.9897885 0.9904999
## PC50 PC51 PC52 PC53 PC54
## Eigenvalue 0.0522431 0.0500933 0.0466117 0.0456046 0.0443441
## Proportion Explained 0.0006785 0.0006506 0.0006053 0.0005923 0.0005759
## Cumulative Proportion 0.9911784 0.9918290 0.9924343 0.9930266 0.9936025
## PC55 PC56 PC57 PC58 PC59
## Eigenvalue 0.0410688 0.0401500 0.0365504 0.0330063 0.0322246
## Proportion Explained 0.0005334 0.0005214 0.0004747 0.0004287 0.0004185
## Cumulative Proportion 0.9941359 0.9946573 0.9951320 0.9955606 0.9959791
## PC60 PC61 PC62 PC63 PC64
## Eigenvalue 0.0314064 0.0306337 0.0281387 0.0268911 0.0242670
## Proportion Explained 0.0004079 0.0003978 0.0003654 0.0003492 0.0003152
## Cumulative Proportion 0.9963870 0.9967848 0.9971503 0.9974995 0.9978147
## PC65 PC66 PC67 PC68 PC69
## Eigenvalue 0.0219850 0.0199656 0.0187510 0.018173 0.0167247
## Proportion Explained 0.0002855 0.0002593 0.0002435 0.000236 0.0002172
## Cumulative Proportion 0.9981002 0.9983595 0.9986030 0.998839 0.9990562
## PC70 PC71 PC72 PC73 PC74 PC75
## Eigenvalue 0.0139096 0.012476 0.0115619 0.0111135 0.0088829 0.008621
## Proportion Explained 0.0001806 0.000162 0.0001502 0.0001443 0.0001154 0.000112
## Cumulative Proportion 0.9992369 0.999399 0.9995490 0.9996934 0.9998087 0.999921
## PC76
## Eigenvalue 6.107e-03
## Proportion Explained 7.931e-05
## Cumulative Proportion 1.000e+00
##
## Scaling 2 for species and site scores
## * Species are scaled proportional to eigenvalues
## * Sites are unscaled: weighted dispersion equal on all dimensions
## * General scaling constant of scores: 14.35194
##
##
## Species scores
##
## PC1 PC2 PC3 PC4 PC5 PC6
## DYRK1A_N -0.3947 -1.1286 0.15966 -0.5175 0.41353 -0.31216
## ITSN1_N -0.7147 -1.1499 0.02458 -0.3317 0.32120 -0.44469
## BDNF_N -1.4439 -0.3316 0.13718 -0.2828 0.07774 0.02153
## NR1_N -1.4547 -0.2890 0.38531 0.2480 -0.13354 0.07702
## NR2A_N -1.3164 -0.3703 0.56759 0.2723 -0.02731 0.04109
## pAKT_N -1.0192 0.8742 -0.23253 -0.6059 -0.35846 0.03465
## ....
##
##
## Site scores (weighted sums of species scores)
##
## PC1 PC2 PC3 PC4 PC5 PC6
## sit1 -0.7976 -0.5826 0.143685 0.5628 0.8218 0.3433
## sit2 -0.7412 -0.4971 0.058363 0.6059 1.0285 0.4438
## sit3 -0.8751 -0.5060 0.110153 0.4895 1.0174 0.3225
## sit4 -0.3027 -0.5902 0.194312 0.3097 0.1477 0.5615
## sit5 -0.3677 -0.4779 0.003043 0.3519 0.5272 0.5926
## sit6 -0.3706 -0.4671 -0.014077 0.4099 0.4814 0.4927
## ....
biplot(data_pca, scaling = "sites", display = "sites")
df_scores <- data.frame(data_without_na,
scores(data_pca,
display = "sites",
choices = c(1, 2, 3),
scaling = "sites"))
p_scores <- ggplot(df_scores, aes(x = PC1, y = PC2)) +
geom_point(aes(color = class), alpha = 0.5) +
coord_equal(xlim = c(-1.2, 1.2),
ylim = c(-1.2, 1.2)) +
ggtitle(label="Ordination plot of the first two axes of PCA") +
theme_bw()
p_scores
biplot(data_pca, scaling = "species",
display = "species")
pca_summary <- summary(data_pca)
pca_result <- as.data.frame(pca_summary$cont)
plot_data <- as.data.frame(t(pca_result[c("Proportion Explained"),]))
plot_data$component <- rownames(plot_data)
plot_data$component <-
gsub("importance.", "",
as.character(plot_data$component))
plot_data <- plot_data %>%
slice_max(`Proportion Explained`, n = 15)
ggplot(plot_data,
aes(reorder(component, -`Proportion Explained`),
`Proportion Explained`, fill = "pink")) +
geom_bar(stat = "identity") +
labs(x = "Component") + theme_bw() +
theme(legend.title = element_blank()) +
theme(legend.position = "none")
prin_comp <- prcomp(data_num, rank. = 3)
components <- prin_comp[["x"]]
components <- data.frame(components)
components$PC2 <- -components$PC2
components$PC3 <- -components$PC3
components = cbind(components, data_without_na$class)
explained_variance_ratio <- summary(prin_comp)[["importance"]]['Proportion of Variance', 1:3]
explained_variance_ratio <- 100 * sum(explained_variance_ratio)
tit = paste('Explained Variance',
explained_variance_ratio)
fig <- plot_ly(components, x = ~PC1, y = ~PC2, z = ~PC3,
color = ~data_without_na$class,
colors = c('#636EFA','#EF553B',
'#00CC96')) %>%
add_markers(size = 12)
fig <- fig %>%
layout(
title = tit,
scene = list(bgcolor = "#e5ecf6")
)
fig
We will use DESeq2 analysis. Here we need to compare groups pair-wise
data_for_deseq <- data_without_na %>%
select(!c(MouseID, Genotype,
Behavior, Treatment, class))
data_for_deseq <- data.frame(t(data_for_deseq))
# DESeq does not work with numeric type
# so let's multiply our values and convert them to integers
data_for_deseq <- data_for_deseq * 10000000
data_for_deseq <- data_for_deseq %>%
mutate_if(is.numeric,as.integer)
colnames(data_for_deseq) <- data_without_na$MouseID
samples = colnames(data_for_deseq)
condition = data_without_na$class
colData = data.frame(samples = samples,
condition = condition)
## Note: levels of factors in the design contain characters other than
## letters, numbers, '_' and '.'. It is recommended (but not required) to use
## only letters, numbers, and delimiters '_' or '.', as these are safe characters
## for column names in R. [This is a message, not a warning or an error]
## Note: levels of factors in the design contain characters other than
## letters, numbers, '_' and '.'. It is recommended (but not required) to use
## only letters, numbers, and delimiters '_' or '.', as these are safe characters
## for column names in R. [This is a message, not a warning or an error]
## Note: levels of factors in the design contain characters other than
## letters, numbers, '_' and '.'. It is recommended (but not required) to use
## only letters, numbers, and delimiters '_' or '.', as these are safe characters
## for column names in R. [This is a message, not a warning or an error]
## Note: levels of factors in the design contain characters other than
## letters, numbers, '_' and '.'. It is recommended (but not required) to use
## only letters, numbers, and delimiters '_' or '.', as these are safe characters
## for column names in R. [This is a message, not a warning or an error]
dds_c_SC_s = DESeq(dds_c_SC_s, test = 'Wald',
betaPrior = FALSE,
sfType = 'ratio')
dds_c_CS_s = DESeq(dds_c_CS_s, test = 'Wald',
betaPrior = FALSE,
sfType = 'ratio')
dds_t_SC_s = DESeq(dds_t_SC_s, test = 'Wald',
betaPrior = FALSE,
sfType = 'ratio')
dds_t_CS_s = DESeq(dds_t_CS_s, test = 'Wald',
betaPrior = FALSE,
sfType = 'ratio')
resultsNames(dds_t_CS_s)
## [1] "Intercept" "condition_c.SC.s_vs_t.CS.s"
## [3] "condition_c.CS.m_vs_t.CS.s" "condition_c.CS.s_vs_t.CS.s"
## [5] "condition_c.SC.m_vs_t.CS.s" "condition_t.CS.m_vs_t.CS.s"
## [7] "condition_t.SC.m_vs_t.CS.s" "condition_t.SC.s_vs_t.CS.s"
res_c_SC <- results(dds_c_SC_s, alpha = 0.05,
contrast = c("condition",
"t-SC-s", "c-SC-s"))
summary(res_c_SC)
##
## out of 77 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 26, 34%
## LFC < 0 (down) : 27, 35%
## outliers [1] : 0, 0%
## low counts [2] : 0, 0%
## (mean count < 1188968)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
res_c_CS <- results(dds_c_CS_s, alpha = 0.05,
contrast = c("condition",
"t-CS-s", "c-CS-s"))
summary(res_c_CS)
##
## out of 77 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 18, 23%
## LFC < 0 (down) : 25, 32%
## outliers [1] : 0, 0%
## low counts [2] : 0, 0%
## (mean count < 1188968)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
res_t_SC <- results(dds_t_SC_s, alpha = 0.05,
contrast = c("condition",
"t-SC-m", "t-SC-s"))
summary(res_t_SC)
##
## out of 77 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 24, 31%
## LFC < 0 (down) : 21, 27%
## outliers [1] : 0, 0%
## low counts [2] : 0, 0%
## (mean count < 1188968)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
res_t_CS <- results(dds_t_CS_s, alpha = 0.05,
contrast = c("condition",
"t-CS-m", "t-CS-s"))
summary(res_t_CS)
##
## out of 77 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 28, 36%
## LFC < 0 (down) : 21, 27%
## outliers [1] : 0, 0%
## low counts [2] : 0, 0%
## (mean count < 1188968)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
Thus, the level of the following proteins is increased in trisomy mice, not stimulated to learn, comparing to control:
The level of histone AcetylH3K9 also differ in trisomy mice, stimulated to learn, comparing to control. The acetylation of this histone is associated with gene activation, however no evidence of its connection to Down syndrome emergence have been reported so far.
After injection of trisomy mice, not stimulated to learn, with memantine, pPKCG and kinase of ribosomal protein S6 were down-regulated, in contrast to the first comparison.
After injection of trisomy mice, stimulated to learn, with memantine, the level of proto-oncogene serine/threonine-protein kinase BRAF was elevated. This protein plays a role in cell growth by sending signals inside the cell promoting, among other functions, cell division.
Thank you for your time and attention, so to say! :)